This vignette assumes a SQL server at localhost (we
use PostgreSQL), with data in OMOP Common Data Model v5.4 format in
schema cdm_new_york3. The patient records shown in this
example are synthetic data from Synthea(TM) Patient
Generator.
library(phea)
library(dplyr)
# Connect to SQL server.
dbcon <- DBI::dbConnect(RPostgres::Postgres(),
host = 'localhost', port = 7654, dbname = 'fort',
user = cred$pg$user, password = cred$pg$pass)
# Call setup_phea so we can use sqlt() and sql0().
setup_phea(dbcon, 'cdm_new_york3')The Atherosclerotic cardiovascular disease (ASCVD) events Risk Estimator+ (ASCVD Risk Estimator Plus, or ASCVD+ for short) is a score developed by the American College of Cardiology/American Heart Association Task Force on Practice Guidelines for assessing the 10-year risk of an event. It was published in 12 Nov 2013 at https://doi.org/10.1161/01.cir.0000437741.48606.98.
To get a sense of what goes into the score, one can look at the
calculator provided by the American College of Cardiology:
The mathematical formula for the score is a bit complicated and is shown further down in this vignette.
To compute this phenotype, we first prepare the components, then the
formula parts, then we put it all together in
calculate_formula().
3004249 Loinc 8480-6 Systolic blood pressure
sbp <- sqlt(measurement) |>
filter(measurement_concept_id == 3004249) |>
make_component(
.pid = person_id,
.ts = measurement_datetime)3012888 Loinc 8462-4 Diastolic blood pressure
dbp <- sqlt(measurement) |>
filter(measurement_concept_id == 3012888) |>
make_component(
.pid = person_id,
.ts = measurement_datetime)3027114 Loinc 2093-3 Cholesterol [Mass/volume] in Serum or Plasma
tc <- sqlt(measurement) |>
filter(measurement_concept_id == 3027114) |>
make_component(
.pid = person_id,
.ts = measurement_datetime)3007070 Loinc 2085-9 Cholesterol in HDL [Mass/volume] in Serum or Plasma
hdl <- sqlt(measurement) |>
filter(measurement_concept_id == 3007070) |>
make_component(
.pid = person_id,
.ts = measurement_datetime)3009966 Loinc 18262-6 Cholesterol in LDL [Mass/volume] in Serum or Plasma by Direct assay
ldl <- sqlt(measurement) |>
filter(measurement_concept_id == 3009966) |>
make_component(
.pid = person_id,
.ts = measurement_datetime)201826 SNOMED 44054006 Type 2 diabetes mellitus
diabetes <- sqlt(condition_occurrence) |>
filter(condition_concept_id == 201826) |>
make_component(
.pid = person_id,
.ts = condition_start_datetime)43054909 LOINC 72166-2 Tobacco smoking status [Not required] 42709996 SNOMED 449868002 Smokes tobacco daily
smoker <- sqlt(observation) |>
filter(observation_concept_id %in% c(42709996)) |>
make_component(
.pid = person_id,
.ts = observation_datetime)ATC:
atc_concepts <- sqlt(concept) |>
filter(concept_code %in% c('C02', 'C03', 'C07', 'C08', 'C09') &&
vocabulary_id == 'ATC') |>
select(concept_id)
drug_concepts <- sqlt(concept_ancestor) |>
inner_join(atc_concepts, by = c('ancestor_concept_id' = 'concept_id')) |>
select(descendant_concept_id)
htt <- sqlt(drug_exposure) |>
inner_join(drug_concepts, by = c('drug_concept_id' = 'descendant_concept_id')) |>
make_component(
.pid = person_id,
.ts = drug_exposure_start_datetime)[Not required]
[Not required]
person <- sqlt(person) |>
make_component(
.pid = person_id,
.ts = birth_datetime)The ASCVD Risk Estimator+ formula can be a bit abstruse at first.
Here’s how the original paper phrased it: There are a three main parts here:
The truth is, the formula as written over there is slightly wrong. By checking the numbers from the paper, one can realize that the ASCVD+ formula is:
ascvd = (1-b) * exp(a-c)
We prepare each part a, b and c separately, then assemble them
together in calculate_formula().
The individual sum comes from multiplying the values of the
components by their coefficients as published in Table
A.:
Notice that the formula has 4 variations, according to whether the patient:
We use CASE WHEN ... constructs to prepare the formula
according to these 4 groups.
Let us first compute the individual sum using the coeficients according to each of the 4 groups.
# WHITE WOMEN
white_women_sum <- paste(sep = ' + ',
'-29.799 * ln(age)', # Ln Age (y) –29.799
'4.884 * ln(age)^2', # Ln Age, Squared 4.884
'13.540 * ln(tc_value_as_number)', # Ln Total Cholesterol (mg/dL) 13.540
'-3.114 * ln(age) * ln(tc_value_as_number)', # Ln Age × Ln Total Cholesterol –3.114
'-13.578 * ln(hdl_value_as_number)', # Ln HDL-C (mg/dL) –13.578
'3.149 * ln(age) * ln(hdl_value_as_number)', # Ln Age × Ln HDL-C 3.149
'case when htt is true then
2.019 * ln(sbp_value_as_number) else 0 end', # Ln Treated Systolic BP (mm Hg) 2.019
### # Ln Age × Ln Treated Systolic BP N/A
'case when htt is false then
1.957 * ln(sbp_value_as_number) else 0 end', # Ln Untreated Systolic BP (mm Hg) 1.957
### # Ln Age × Ln Untreated Systolic BP N/A
'7.574 * case when is_smoker then 1 else 0 end', # Current Smoker (1=Yes, 0=No) 7.574
'-1.665 * ln(age) * case when is_smoker then 1 else 0 end', # Ln Age × Current Smoker –1.665
'0.661 * case when has_diabetes then 1 else 0 end' # Diabetes (1=Yes, 0=No) 0.661
)
# BLACK WOMEN
black_women_sum <- paste(sep = ' + ',
# Ln Age (y) 17.114
'17.114 * ln(age)',
# Ln Age, Squared N/A
# Ln Total Cholesterol (mg/dL) 0.940
'0.940 * ln(tc_value_as_number)',
# Ln Age × Ln Total Cholesterol N/A
# Ln HDL-C (mg/dL) –18.920
'-18.920 * ln(hdl_value_as_number)',
# Ln Age × Ln HDL-C 4.475
'4.475 * ln(age) * ln(hdl_value_as_number)',
# Ln Treated Systolic BP (mm Hg) 29.291
'case when htt is true then
29.291 * ln(sbp_value_as_number) else 0 end',
# Ln Age × Ln Treated Systolic BP –6.432
'case when htt is true then
-6.432 * ln(age) * ln(sbp_value_as_number) else 0 end',
# Ln Untreated Systolic BP (mm Hg) 27.820
'case when htt is false then
27.820 * ln(sbp_value_as_number) else 0 end',
# Ln Age × Ln Untreated Systolic BP –6.087
'case when htt is false then
-6.087 * ln(age) * ln(sbp_value_as_number) else 0 end',
# Current Smoker (1=Yes, 0=No) 0.691
'0.691 * case when is_smoker then 1 else 0 end',
# Ln Age × Current Smoker N/A
# Diabetes (1=Yes, 0=No) 0.874
'0.874 * case when has_diabetes then 1 else 0 end'
)
# WHITE MEN
white_men_sum <- paste(sep = ' + ',
# Ln Age (y) 12.344
'12.344 * ln(age)',
# Ln Total Cholesterol (mg/dL) 11.853
'11.853 * ln(tc_value_as_number)',
# Ln Age × Ln Total Cholesterol –2.664
'-2.664 * ln(age) * ln(tc_value_as_number)',
# Ln HDL-C (mg/dL) –7.990
'-7.990 * ln(hdl_value_as_number)',
# Ln Age × Ln HDL-C 1.769
'1.769 * ln(age) * ln(hdl_value_as_number)',
# Ln Treated Systolic BP (mm Hg) 1.797
'case when htt is true then
1.797 * ln(sbp_value_as_number) else 0 end',
# Ln Untreated Systolic BP (mm Hg) 1.764
'case when htt is false then
1.764 * ln(sbp_value_as_number) else 0 end',
# Current Smoker (1=Yes, 0=No) 7.837
'7.837 * case when is_smoker then 1 else 0 end',
# Ln Age × Current Smoker –1.795
'-1.795 * ln(age) * case when is_smoker then 1 else 0 end',
# Diabetes (1=Yes, 0=No) 0.658
'0.658 * case when has_diabetes then 1 else 0 end'
)
# BLACK MEN
black_men_sum <- paste(sep = ' + ',
# Ln Age (y) 2.469
'2.469 * ln(age)',
# Ln Total Cholesterol (mg/dL) 0.30
'0.30 * ln(tc_value_as_number)',
# Ln Age × Ln Total Cholesterol N/A
# Ln HDL-C (mg/dL) –0.307
'-0.307 * ln(hdl_value_as_number)',
# Ln Age × Ln HDL-C N/A
# Ln Treated Systolic BP (mm Hg) 1.916
'case when htt is true then
1.916 * ln(sbp_value_as_number) else 0 end',
# Ln Untreated Systolic BP (mm Hg) 1.809
'case when htt is false then
1.809 * ln(sbp_value_as_number) else 0 end',
# Current Smoker (1=Yes, 0=No) 0.549
'0.549 * case when is_smoker then 1 else 0 end',
# Ln Age × Current Smoker N/A
# Diabetes (1=Yes, 0=No) 0.645
'0.645 * case when has_diabetes then 1 else 0 end'
)To facilitate reading and using the code, we assemble parts of the phenotype in separate formulas.
individual_sum <- paste0(
'case when is_woman then (
case when is_black then (', black_women_sum, ')
else (', white_women_sum, ')
end)
else (
case when is_black then (', black_men_sum, ')
else (', white_men_sum, ')
end)
end')
baseline_survival <- paste0(
'case when is_woman then (
case when is_black then 0.9533
else 0.9665
end)
else (
case when is_black then 0.8954
else 0.9144
end)
end')
group_mean <- paste0(
'case when is_woman then (
case when is_black then 86.61
else -29.18
end)
else (
case when is_black then 19.54
else 61.18
end)
end')Armed with the components and formula parts, we put it all together
in calculate_formula().
ascvd = calculate_formula(
components = list(
sbp = sbp,
dbp = dbp,
tc = tc,
hdl = hdl,
ldl = ldl,
diabetes = diabetes,
smoker = smoker,
htt = htt,
person = person),
export = c(
'sbp_value_as_number',
# 'dbp_value_as_number',
'tc_value_as_number',
'hdl_value_as_number',
# 'ldl_value_as_number',
'diabetes_condition_concept_id',
'diabetes_condition_start_datetime',
'smoker_observation_datetime',
'htt_drug_concept_id',
'htt_drug_exposure_start_datetime',
'person_birth_datetime'),
fml = list(
preprocess = list(
is_woman = 'person_gender_concept_id = 8532',
is_black = 'person_race_concept_id = 8516',
age = 'extract(year from age(ts, person_birth_datetime))',
is_smoker = 'smoker_observation_datetime is null',
has_diabetes = 'diabetes_condition_start_datetime is null',
htt_sbp_dist = "case when htt_drug_exposure_start_datetime - sbp_measurement_datetime < interval '0'
then sbp_measurement_datetime - htt_drug_exposure_start_datetime
else htt_drug_exposure_start_datetime - sbp_measurement_datetime end"
),
htt = "htt_drug_exposure_start_datetime is not null
and htt_sbp_dist < '4 months'", # SBP and HTT within 4 months of each other
individual_sum = individual_sum,
baseline_survival = baseline_survival,
group_mean = group_mean,
calculated_risk = '(1 - baseline_survival) * exp(individual_sum - group_mean)',
estimated_risk = 'least(0.3, greatest(0.01, calculated_risk))' # Cap to 1%-30%.
),
.dont_require = c('smoker', 'htt', 'diabetes'),
.out_window = c('person', 'diabetes'))Let us peek at 20 rows from the phenotype.
ascvd |>
head_shot(20, blind = TRUE) |>
kable()| row_id | pid | ts | window | person_gender_concept_id | person_race_concept_id | person_birth_datetime | smoker_observation_datetime | diabetes_condition_start_datetime | htt_drug_exposure_start_datetime | sbp_measurement_datetime | tc_value_as_number | hdl_value_as_number | sbp_value_as_number | diabetes_condition_concept_id | htt_drug_concept_id | is_woman | is_black | age | is_smoker | has_diabetes | htt_sbp_dist | htt | individual_sum | baseline_survival | group_mean | calculated_risk | estimated_risk |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 31 | 1 | 2005-05-13 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | NA | NA | 2005-05-13 | 199.0 | 79.1 | 121 | NA | NA | FALSE | FALSE | 31 | TRUE | TRUE | NA | FALSE | 59.12642 | 0.9144 | 61.18 | 0.0109803 | 0.0109803 |
| 21 | 1 | 2006-05-12 | 364 days | 8507 | 8527 | 1974-03-01 | NA | NA | NA | 2006-05-12 | 199.0 | 79.1 | 121 | NA | NA | FALSE | FALSE | 32 | TRUE | TRUE | NA | FALSE | 59.25911 | 0.9144 | 61.18 | 0.0125384 | 0.0125384 |
| 12 | 1 | 2008-05-16 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | NA | NA | 2008-05-16 | 182.2 | 73.8 | 117 | NA | NA | FALSE | FALSE | 34 | TRUE | TRUE | NA | FALSE | 59.35782 | 0.9144 | 61.18 | 0.0138392 | 0.0138392 |
| 33 | 1 | 2011-05-20 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | 2011-05-20 | NA | 2011-05-20 | 183.1 | 60.7 | 134 | 201826 | NA | FALSE | FALSE | 37 | TRUE | FALSE | NA | FALSE | 59.62615 | 0.9144 | 61.18 | 0.0180986 | 0.0180986 |
| 34 | 1 | 2014-03-07 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | 2011-05-20 | NA | 2014-03-07 | 209.9 | 43.5 | 108 | 201826 | NA | FALSE | FALSE | 40 | TRUE | FALSE | NA | FALSE | 60.31688 | 0.9144 | 61.18 | 0.0361099 | 0.0361099 |
| 5 | 1 | 2016-03-11 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | 2011-05-20 | NA | 2016-03-11 | 224.1 | 57.3 | 123 | 201826 | NA | FALSE | FALSE | 42 | TRUE | FALSE | NA | FALSE | 60.43606 | 0.9144 | 61.18 | 0.0406805 | 0.0406805 |
| 16 | 1 | 2018-03-16 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | 2011-05-20 | NA | 2018-03-16 | 221.5 | 52.7 | 111 | 201826 | NA | FALSE | FALSE | 44 | TRUE | FALSE | NA | FALSE | 60.49591 | 0.9144 | 61.18 | 0.0431896 | 0.0431896 |
| 46 | 1 | 2020-03-20 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | 2011-05-20 | NA | 2020-03-20 | 205.0 | 55.5 | 127 | 201826 | NA | FALSE | FALSE | 46 | TRUE | FALSE | NA | FALSE | 60.68360 | 0.9144 | 61.18 | 0.0521065 | 0.0521065 |
| 8 | 1 | 2022-02-11 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | 2011-05-20 | NA | 2022-02-11 | 212.2 | 46.6 | 130 | 201826 | NA | FALSE | FALSE | 47 | TRUE | FALSE | NA | FALSE | 61.06067 | 0.9144 | 61.18 | 0.0759716 | 0.0759716 |
| 9 | 1 | 2022-03-25 | 00:00:00 | 8507 | 8527 | 1974-03-01 | NA | 2011-05-20 | NA | 2022-03-25 | 204.0 | 51.4 | 109 | 201826 | NA | FALSE | FALSE | 48 | TRUE | FALSE | NA | FALSE | 60.64193 | 0.9144 | 61.18 | 0.0499794 | 0.0499794 |
| 104 | 2 | 2004-07-17 | 00:00:00 | 8507 | 8527 | 1949-10-29 | NA | NA | 2004-07-17 | 2004-07-17 | 199.8 | 61.4 | 178 | NA | 19073094 | FALSE | FALSE | 54 | TRUE | TRUE | 00:00:00 | TRUE | 62.53925 | 0.9144 | 61.18 | 0.3332655 | 0.3000000 |
| 79 | 2 | 2005-07-23 | 371 days | 8507 | 8527 | 1949-10-29 | NA | NA | 2005-07-23 | 2005-07-23 | 199.8 | 61.4 | 151 | NA | 19073094 | FALSE | FALSE | 55 | TRUE | TRUE | 00:00:00 | TRUE | 62.31191 | 0.9144 | 61.18 | 0.2654953 | 0.2654953 |
| 153 | 2 | 2006-07-29 | 742 days | 8507 | 8527 | 1949-10-29 | NA | NA | 2006-07-29 | 2006-07-29 | 199.8 | 61.4 | 157 | NA | 19073094 | FALSE | FALSE | 56 | TRUE | TRUE | 00:00:00 | TRUE | 62.44897 | 0.9144 | 61.18 | 0.3044964 | 0.3000000 |
| 154 | 2 | 2007-08-04 | 00:00:00 | 8507 | 8527 | 1949-10-29 | NA | NA | 2007-08-04 | 2007-08-04 | 186.4 | 71.2 | 148 | NA | 19073094 | FALSE | FALSE | 57 | TRUE | TRUE | 00:00:00 | TRUE | 62.20954 | 0.9144 | 61.18 | 0.2396606 | 0.2396606 |
| 62 | 2 | 2008-08-09 | 371 days | 8507 | 8527 | 1949-10-29 | NA | NA | 2008-08-09 | 2008-08-09 | 186.4 | 71.2 | 171 | NA | 19073094 | FALSE | FALSE | 58 | TRUE | TRUE | 00:00:00 | TRUE | 62.54160 | 0.9144 | 61.18 | 0.3340476 | 0.3000000 |
| 63 | 2 | 2009-08-15 | 742 days | 8507 | 8527 | 1949-10-29 | NA | NA | 2009-08-15 | 2009-08-15 | 186.4 | 71.2 | 142 | NA | 19073094 | FALSE | FALSE | 59 | TRUE | TRUE | 00:00:00 | TRUE | 62.27889 | 0.9144 | 61.18 | 0.2568718 | 0.2568718 |
| 84 | 2 | 2010-08-21 | 00:00:00 | 8507 | 8527 | 1949-10-29 | NA | NA | 2010-08-21 | 2010-08-21 | 173.6 | 60.4 | 182 | NA | 19073094 | FALSE | FALSE | 60 | TRUE | TRUE | 00:00:00 | TRUE | 62.85054 | 0.9144 | 61.18 | 0.4549675 | 0.3000000 |
| 65 | 2 | 2011-08-27 | 371 days | 8507 | 8527 | 1949-10-29 | NA | NA | 2011-08-27 | 2011-08-27 | 173.6 | 60.4 | 185 | NA | 19073094 | FALSE | FALSE | 61 | TRUE | TRUE | 00:00:00 | TRUE | 62.94713 | 0.9144 | 61.18 | 0.5011044 | 0.3000000 |
| 159 | 2 | 2012-09-01 | 742 days | 8507 | 8527 | 1949-10-29 | NA | NA | 2012-09-01 | 2012-09-01 | 173.6 | 60.4 | 155 | NA | 19073094 | FALSE | FALSE | 62 | TRUE | TRUE | 00:00:00 | TRUE | 62.69530 | 0.9144 | 61.18 | 0.3895477 | 0.3000000 |
| 107 | 2 | 2013-09-07 | 00:00:00 | 8507 | 8527 | 1949-10-29 | NA | NA | 2013-09-07 | 2013-09-07 | 176.2 | 77.5 | 150 | NA | 19073094 | FALSE | FALSE | 63 | TRUE | TRUE | 00:00:00 | TRUE | 62.54883 | 0.9144 | 61.18 | 0.3364732 | 0.3000000 |
To make a visually interesting plot, let us pick a patient at random, but from among those that have the most data points.
patients_with_most_data <- list(ascvd = ascvd) |>
sqla('
select pid, count(*) as rows
from ascvd
group by pid
order by rows desc
limit 20
') |>
pull('pid')From the 20 patients_with_most_data, we pick one at
random.
random_patient <- sample(patients_with_most_data, 1)
message('Sampled patient: ', random_patient)
#> Sampled patient: 4932And then we plot the chosen patient.
ascvd |>
select(-ends_with('datetime'), -ends_with('concept_id'), -htt_sbp_dist,
-group_mean, -individual_sum, -calculated_risk, -baseline_survival) |>
phea_plot(random_patient, verbose = FALSE)To see the SQL query that computes the phenotype, use helper function
code_shot(), or dbplyr::sql_render(), or the
.clip_sql option in calculate_formula().
code_shot(ascvd)SELECT *, least(0.3, greatest(0.01, calculated_risk)) AS "estimated_risk"
FROM (
SELECT
*,
(1 - baseline_survival) * exp(individual_sum - group_mean) AS "calculated_risk"
FROM (
SELECT
*,
case when is_woman then (
case when is_black then 86.61
else -29.18
end)
else (
case when is_black then 19.54
else 61.18
end)
end AS "group_mean"
FROM (
SELECT
*,
case when is_woman then (
case when is_black then 0.9533
else 0.9665
end)
else (
case when is_black then 0.8954
else 0.9144
end)
end AS "baseline_survival"
FROM (
SELECT
*,
case when is_woman then (
case when is_black then (17.114 * ln(age) + 0.940 * ln(tc_value_as_number) + -18.920 * ln(hdl_value_as_number) + 4.475 * ln(age) * ln(hdl_value_as_number) + case when htt is true then
29.291 * ln(sbp_value_as_number) else 0 end + case when htt is true then
-6.432 * ln(age) * ln(sbp_value_as_number) else 0 end + case when htt is false then
27.820 * ln(sbp_value_as_number) else 0 end + case when htt is false then
-6.087 * ln(age) * ln(sbp_value_as_number) else 0 end + 0.691 * case when is_smoker then 1 else 0 end + 0.874 * case when has_diabetes then 1 else 0 end)
else (-29.799 * ln(age) + 4.884 * ln(age)^2 + 13.540 * ln(tc_value_as_number) + -3.114 * ln(age) * ln(tc_value_as_number) + -13.578 * ln(hdl_value_as_number) + 3.149 * ln(age) * ln(hdl_value_as_number) + case when htt is true then
2.019 * ln(sbp_value_as_number) else 0 end + case when htt is false then
1.957 * ln(sbp_value_as_number) else 0 end + 7.574 * case when is_smoker then 1 else 0 end + -1.665 * ln(age) * case when is_smoker then 1 else 0 end + 0.661 * case when has_diabetes then 1 else 0 end)
end)
else (
case when is_black then (2.469 * ln(age) + 0.30 * ln(tc_value_as_number) + -0.307 * ln(hdl_value_as_number) + case when htt is true then
1.916 * ln(sbp_value_as_number) else 0 end + case when htt is false then
1.809 * ln(sbp_value_as_number) else 0 end + 0.549 * case when is_smoker then 1 else 0 end + 0.645 * case when has_diabetes then 1 else 0 end)
else (12.344 * ln(age) + 11.853 * ln(tc_value_as_number) + -2.664 * ln(age) * ln(tc_value_as_number) + -7.990 * ln(hdl_value_as_number) + 1.769 * ln(age) * ln(hdl_value_as_number) + case when htt is true then
1.797 * ln(sbp_value_as_number) else 0 end + case when htt is false then
1.764 * ln(sbp_value_as_number) else 0 end + 7.837 * case when is_smoker then 1 else 0 end + -1.795 * ln(age) * case when is_smoker then 1 else 0 end + 0.658 * case when has_diabetes then 1 else 0 end)
end)
end AS "individual_sum"
FROM (
SELECT
*,
htt_drug_exposure_start_datetime is not null
and htt_sbp_dist < '4 months' AS "htt"
FROM (
SELECT
"row_id",
"pid",
"ts",
"window",
"person_gender_concept_id",
"person_race_concept_id",
"person_birth_datetime",
"smoker_observation_datetime",
"diabetes_condition_start_datetime",
"htt_drug_exposure_start_datetime",
"sbp_measurement_datetime",
"tc_value_as_number",
"hdl_value_as_number",
"sbp_value_as_number",
"diabetes_condition_concept_id",
"htt_drug_concept_id",
person_gender_concept_id = 8532 AS "is_woman",
person_race_concept_id = 8516 AS "is_black",
extract(year from age(ts, person_birth_datetime)) AS "age",
smoker_observation_datetime is null AS "is_smoker",
diabetes_condition_start_datetime is null AS "has_diabetes",
case when htt_drug_exposure_start_datetime - sbp_measurement_datetime < interval '0'
then sbp_measurement_datetime - htt_drug_exposure_start_datetime
else htt_drug_exposure_start_datetime - sbp_measurement_datetime end AS "htt_sbp_dist"
FROM (
SELECT
*,
"ts" - least(sbp_ts, tc_ts, hdl_ts, smoker_ts, htt_ts) AS "window",
last_value(row_id) over (partition by "pid", "ts") AS "ts_row"
FROM (
SELECT
"row_id",
"pid",
"ts",
MAX("sbp_measurement_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_1") AS "sbp_measurement_datetime",
MAX("sbp_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_2") AS "sbp_value_as_number",
MAX("sbp_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_3") AS "sbp_ts",
MAX("dbp_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_4") AS "dbp_ts",
MAX("tc_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_5") AS "tc_value_as_number",
MAX("tc_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_6") AS "tc_ts",
MAX("hdl_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_7") AS "hdl_value_as_number",
MAX("hdl_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_8") AS "hdl_ts",
MAX("ldl_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_9") AS "ldl_ts",
MAX("diabetes_condition_concept_id") OVER (PARTITION BY "pid", "..dbplyr_partion_10") AS "diabetes_condition_concept_id",
MAX("diabetes_condition_start_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_11") AS "diabetes_condition_start_datetime",
MAX("diabetes_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_12") AS "diabetes_ts",
MAX("smoker_observation_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_13") AS "smoker_observation_datetime",
MAX("smoker_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_14") AS "smoker_ts",
MAX("htt_drug_concept_id") OVER (PARTITION BY "pid", "..dbplyr_partion_15") AS "htt_drug_concept_id",
MAX("htt_drug_exposure_start_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_16") AS "htt_drug_exposure_start_datetime",
MAX("htt_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_17") AS "htt_ts",
MAX("person_gender_concept_id") OVER (PARTITION BY "pid", "..dbplyr_partion_18") AS "person_gender_concept_id",
MAX("person_birth_datetime") OVER (PARTITION BY "pid", "..dbplyr_partion_19") AS "person_birth_datetime",
MAX("person_race_concept_id") OVER (PARTITION BY "pid", "..dbplyr_partion_20") AS "person_race_concept_id",
MAX("person_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_21") AS "person_ts"
FROM (
SELECT
*,
SUM(CASE WHEN (("sbp_measurement_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_1",
SUM(CASE WHEN (("sbp_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_2",
SUM(CASE WHEN (("sbp_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_3",
SUM(CASE WHEN (("dbp_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_4",
SUM(CASE WHEN (("tc_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_5",
SUM(CASE WHEN (("tc_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_6",
SUM(CASE WHEN (("hdl_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_7",
SUM(CASE WHEN (("hdl_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_8",
SUM(CASE WHEN (("ldl_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_9",
SUM(CASE WHEN (("diabetes_condition_concept_id" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_10",
SUM(CASE WHEN (("diabetes_condition_start_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_11",
SUM(CASE WHEN (("diabetes_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_12",
SUM(CASE WHEN (("smoker_observation_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_13",
SUM(CASE WHEN (("smoker_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_14",
SUM(CASE WHEN (("htt_drug_concept_id" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_15",
SUM(CASE WHEN (("htt_drug_exposure_start_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_16",
SUM(CASE WHEN (("htt_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_17",
SUM(CASE WHEN (("person_gender_concept_id" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_18",
SUM(CASE WHEN (("person_birth_datetime" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_19",
SUM(CASE WHEN (("person_race_concept_id" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_20",
SUM(CASE WHEN (("person_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_21"
FROM (
SELECT
row_number() over () AS "row_id",
"pid",
"ts",
last_value(case when "name" = 'lo2twybvr57d' then "measurement_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "sbp_measurement_datetime",
last_value(case when "name" = 'lo2twybvr57d' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "sbp_value_as_number",
last_value(case when "name" = 'lo2twybvr57d' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "sbp_ts",
last_value(case when "name" = 'd6to5qbarhpm' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "dbp_ts",
last_value(case when "name" = 'bz7dh91stmp8' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "tc_value_as_number",
last_value(case when "name" = 'bz7dh91stmp8' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "tc_ts",
last_value(case when "name" = 'u7r239afhxpw' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "hdl_value_as_number",
last_value(case when "name" = 'u7r239afhxpw' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "hdl_ts",
last_value(case when "name" = 'qvoki51zxjec' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "ldl_ts",
last_value(case when "name" = 'oy6pqib2e8z0' then "condition_concept_id" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "diabetes_condition_concept_id",
last_value(case when "name" = 'oy6pqib2e8z0' then "condition_start_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "diabetes_condition_start_datetime",
last_value(case when "name" = 'oy6pqib2e8z0' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "diabetes_ts",
last_value(case when "name" = 'bl9a1q2btixo' then "observation_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "smoker_observation_datetime",
last_value(case when "name" = 'bl9a1q2btixo' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "smoker_ts",
last_value(case when "name" = 'ufc6yd4x15ju' then "drug_concept_id" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "htt_drug_concept_id",
last_value(case when "name" = 'ufc6yd4x15ju' then "drug_exposure_start_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "htt_drug_exposure_start_datetime",
last_value(case when "name" = 'ufc6yd4x15ju' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "htt_ts",
last_value(case when "name" = 'vpidcysg793u' then "gender_concept_id" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "person_gender_concept_id",
last_value(case when "name" = 'vpidcysg793u' then "birth_datetime" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "person_birth_datetime",
last_value(case when "name" = 'vpidcysg793u' then "race_concept_id" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "person_race_concept_id",
last_value(case when "name" = 'vpidcysg793u' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and 0 preceding) AS "person_ts"
FROM (
(
SELECT
*,
NULL AS "gender_concept_id",
NULL AS "birth_datetime",
NULL AS "race_concept_id"
FROM (
(
SELECT
*,
NULL AS "drug_concept_id",
NULL AS "drug_exposure_start_datetime"
FROM (
(
SELECT *, NULL AS "observation_datetime"
FROM (
(
SELECT
*,
NULL AS "condition_concept_id",
NULL AS "condition_start_datetime"
FROM (
(
(
(
(
SELECT
'lo2twybvr57d' AS "name",
"person_id" AS "pid",
"measurement_datetime" AS "ts",
"measurement_datetime",
"value_as_number"
FROM "cdm_new_york3"."measurement"
WHERE ("measurement_concept_id" = 3004249.0)
)
UNION ALL
(
SELECT
*,
NULL AS "measurement_datetime",
NULL AS "value_as_number"
FROM (
SELECT
'd6to5qbarhpm' AS "name",
"person_id" AS "pid",
"measurement_datetime" AS "ts"
FROM "cdm_new_york3"."measurement"
WHERE ("measurement_concept_id" = 3012888.0)
) "q01"
)
)
UNION ALL
(
SELECT
"name",
"pid",
"ts",
NULL AS "measurement_datetime",
"value_as_number"
FROM (
SELECT
'bz7dh91stmp8' AS "name",
"person_id" AS "pid",
"measurement_datetime" AS "ts",
"value_as_number"
FROM "cdm_new_york3"."measurement"
WHERE ("measurement_concept_id" = 3027114.0)
) "q02"
)
)
UNION ALL
(
SELECT
"name",
"pid",
"ts",
NULL AS "measurement_datetime",
"value_as_number"
FROM (
SELECT
'u7r239afhxpw' AS "name",
"person_id" AS "pid",
"measurement_datetime" AS "ts",
"value_as_number"
FROM "cdm_new_york3"."measurement"
WHERE ("measurement_concept_id" = 3007070.0)
) "q03"
)
)
UNION ALL
(
SELECT
*,
NULL AS "measurement_datetime",
NULL AS "value_as_number"
FROM (
SELECT
'qvoki51zxjec' AS "name",
"person_id" AS "pid",
"measurement_datetime" AS "ts"
FROM "cdm_new_york3"."measurement"
WHERE ("measurement_concept_id" = 3009966.0)
) "q04"
)
) "q05"
)
UNION ALL
(
SELECT
"name",
"pid",
"ts",
NULL AS "measurement_datetime",
NULL AS "value_as_number",
"condition_concept_id",
"condition_start_datetime"
FROM (
SELECT
'oy6pqib2e8z0' AS "name",
"person_id" AS "pid",
"condition_start_datetime" AS "ts",
"condition_concept_id",
"condition_start_datetime"
FROM "cdm_new_york3"."condition_occurrence"
WHERE ("condition_concept_id" = 201826.0)
) "q06"
)
) "q07"
)
UNION ALL
(
SELECT
"name",
"pid",
"ts",
NULL AS "measurement_datetime",
NULL AS "value_as_number",
NULL AS "condition_concept_id",
NULL AS "condition_start_datetime",
"observation_datetime"
FROM (
SELECT
'bl9a1q2btixo' AS "name",
"person_id" AS "pid",
"observation_datetime" AS "ts",
"observation_datetime"
FROM "cdm_new_york3"."observation"
WHERE ("observation_concept_id" IN (42709996.0))
) "q08"
)
) "q09"
)
UNION ALL
(
SELECT
"name",
"pid",
"ts",
NULL AS "measurement_datetime",
NULL AS "value_as_number",
NULL AS "condition_concept_id",
NULL AS "condition_start_datetime",
NULL AS "observation_datetime",
"drug_concept_id",
"drug_exposure_start_datetime"
FROM (
SELECT
'ufc6yd4x15ju' AS "name",
"person_id" AS "pid",
"drug_exposure_start_datetime" AS "ts",
"drug_concept_id",
"drug_exposure_start_datetime"
FROM (
SELECT
"drug_exposure_id",
"person_id",
"drug_concept_id",
"drug_exposure_start_date",
"drug_exposure_start_datetime",
"drug_exposure_end_date",
"drug_exposure_end_datetime",
"verbatim_end_date",
"drug_type_concept_id",
"stop_reason",
"refills",
"quantity",
"days_supply",
"sig",
"route_concept_id",
"lot_number",
"provider_id",
"visit_occurrence_id",
"visit_detail_id",
"drug_source_value",
"drug_source_concept_id",
"route_source_value",
"dose_unit_source_value"
FROM "cdm_new_york3"."drug_exposure" AS "LHS"
INNER JOIN (
SELECT "descendant_concept_id"
FROM (
SELECT
"ancestor_concept_id",
"descendant_concept_id",
"min_levels_of_separation",
"max_levels_of_separation"
FROM "cdm_new_york3"."concept_ancestor" AS "LHS"
INNER JOIN (
SELECT "concept_id"
FROM "cdm_new_york3"."concept"
WHERE ("concept_code" IN ('C02', 'C03', 'C07', 'C08', 'C09') AND "vocabulary_id" = 'ATC')
) "RHS"
ON ("LHS"."ancestor_concept_id" = "RHS"."concept_id")
) "q10"
) "RHS"
ON ("LHS"."drug_concept_id" = "RHS"."descendant_concept_id")
) "q11"
) "q12"
)
) "q13"
)
UNION ALL
(
SELECT
"name",
"pid",
"ts",
NULL AS "measurement_datetime",
NULL AS "value_as_number",
NULL AS "condition_concept_id",
NULL AS "condition_start_datetime",
NULL AS "observation_datetime",
NULL AS "drug_concept_id",
NULL AS "drug_exposure_start_datetime",
"gender_concept_id",
"birth_datetime",
"race_concept_id"
FROM (
SELECT
'vpidcysg793u' AS "name",
"person_id" AS "pid",
"birth_datetime" AS "ts",
"gender_concept_id",
"birth_datetime",
"race_concept_id"
FROM "cdm_new_york3"."person"
) "q14"
)
) "q15"
) "q16"
) "q17"
) "q18"
) "q19"
WHERE ("row_id" = "ts_row" AND sbp_ts is not null and dbp_ts is not null and tc_ts is not null and hdl_ts is not null and ldl_ts is not null and person_ts is not null)
) "q20"
) "q21"
) "q22"
) "q23"
) "q24"
) "q25"